{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0eece77-cc70-49c1-ad18-d67f5c985d54",
   "metadata": {},
   "outputs": [],
   "source": [
    "import ibis\n",
    "\n",
    "conn = ibis.duckdb.connect()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "253e2f55-d38a-40a8-9647-a92b8873215c",
   "metadata": {},
   "outputs": [],
   "source": [
    "licenses = conn.read_csv(\n",
    "    \"../chapter_09/NYC_Dog_Licensing_Dataset.csv\",\n",
    "    ignore_errors=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b909579-aea2-43ae-9c13-524f97083b93",
   "metadata": {},
   "outputs": [],
   "source": [
    "from ibis import _"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1ff5769c-6af2-4d64-86e8-23abf25b1921",
   "metadata": {},
   "outputs": [],
   "source": [
    "distinct_cols = [\n",
    "    \"AnimalName\", \"AnimalGender\", \"AnimalBirthYear\", \n",
    "    \"BreedName\", \"ZipCode\"\n",
    "]\n",
    "\n",
    "unique_dogs = licenses[distinct_cols].filter(\n",
    "    _.AnimalName.notin(\n",
    "        [\"UNKNOWN\", \"NAME NOT PROVIDED\", \"NAME\", \"NONE\"]\n",
    "    )\n",
    ").filter(\n",
    "    _.BreedName.notin([\"Unknown\", \"Not Provided\"])\n",
    ").distinct()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fac330ef-d46d-42a0-880a-035673e17306",
   "metadata": {},
   "outputs": [],
   "source": [
    "dogs_by_year = unique_dogs.aggregate(\n",
    "    by=_.AnimalBirthYear, \n",
    "    Count=_.count()\n",
    ").order_by(\n",
    "    ibis.desc(\"AnimalBirthYear\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "020ea47e-55b2-4622-a0cb-7b08eb20bfe6",
   "metadata": {},
   "outputs": [],
   "source": [
    "dogs_by_year = unique_dogs.group_by(\n",
    "    \"AnimalBirthYear\"\n",
    ").aggregate(\n",
    "    Count=_.count()\n",
    ").order_by(\n",
    "    ibis.desc(\"AnimalBirthYear\")\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "926af4e0-5078-4410-a6d0-4c8957f31905",
   "metadata": {},
   "outputs": [],
   "source": [
    "ibis.options.interactive = True\n",
    "\n",
    "dogs_by_year.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa97a627-a882-4a9b-bfdd-347ee8fc4587",
   "metadata": {},
   "outputs": [],
   "source": [
    "dogs_by_year_df = dogs_by_year.to_pandas()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
